# _*_coding:utf-8_*_

import kNN
from numpy import *
import operator
from os import listdir

'''
*计算一只类别数据集中的点和当前点的距离
*按照距离递增次序排序
*选取当前距离最小的的K个点
*确定前K个点所在类别的出现频率
*返回前k个点出现频率最高的类别作为当前点的预测分类
'''


def classify0(inX,dataSet,labels,k):
    dataSetSize = dataSet.shape[0]
    #距离计算,
    #tile:生成和dataSet相同维数的样本数据相同的数据集矩阵
    diffMat = tile(inX,(dataSetSize,1))-dataSet
    sqDiffMat = diffMat**2
    sqDistance = sqDiffMat.sum(axis=1)#按照列进行相加
    distance = sqDistance**0.5
    sortedDistIndicies = distance.argsort()
    classCount={}
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) +1 #选择距离最小的点
    sortedClassCount = sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)

    return sortedClassCount[0][0]



























